package com.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

public class CombineMapReduce extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        // 指定输入输出路径
        Path input = new Path(
                "hdfs://192.168.10.11:9000/indexdata");
        Path output = new Path(
                "hdfs://192.168.10.11:9000/combineoutput");

        FileSystem fs = FileSystem.get(
                new URI("hdfs://192.168.10.11:9000")
                ,conf);
        if (fs.exists(output)) fs.delete(output,true);
        //构建Job
        Job job = Job.getInstance(conf);
        job.setJobName("combine");
        job.setJarByClass(this.getClass());
        // 使用单词计数map:TokenCounterMapper
        job.setMapperClass(TokenCounterMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        // IntSumReducer
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        // 优化作用，提前合并小文件
        job.setInputFormatClass(CombineTextInputFormat.class);
        // 设置最大的切片大小
        CombineTextInputFormat.setMaxInputSplitSize(
                job,1024*1024*1024);
        // 设置最小的切片大小
        CombineTextInputFormat.setMinInputSplitSize(
                job,1024);
        CombineTextInputFormat.addInputPath(job,input);

        TextOutputFormat.setOutputPath(job,output);
        return job.waitForCompletion(true)?0:-1;
    }

    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new CombineMapReduce(),args));
    }
}
